rm(list = ls(all = TRUE))
require(dplyr)
require(ggplot2)
require(randomForest)
require(inTrees)
set.seed(1)
data <- rbind(c("1", "1", "C0"), c("1", "0", "C1"), c("0", "1", "C1"), c("0",
"0", "C1"), c("0", "0", "C0"), c("0", "0", "C0"), c("0", "0", "C0")) %>%
as.data.frame()
colnames(data) <- c("X1", "X2", "Classs")
rf <- randomForest(Classs ~ ., data = data, keep.inbag = TRUE)
X <- data[, c("X1", "X2")]
target <- data$Classs
imp <- as.data.frame(rf$importance)
colnames(imp)[colnames(imp) == "MeanDecreaseGini"] <- "importance"
imp <- imp[order(imp$importance, decreasing = FALSE), , drop = FALSE]
imp$feature <- rownames(imp)
imp$feature <- factor(imp$feature, levels = as.character(imp$feature))
theme_set(theme_gray(base_size = 18))
ggplot(data = imp, aes(x = feature, y = importance)) + geom_bar(stat = "identity",
aes(factor(feature)), fill = "red") + theme(axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 1, size = 15)) + coord_flip()

rm(list = ls(all = TRUE))
library("arules")
library("randomForest")
library("RRF")
library("inTrees")
library("reshape")
library("ggplot2")
set.seed(1)
path <- "../../data/ADNI/ADNI_baseline2.csv"
data <- read.csv(path, header = TRUE)
target_indx <- which(colnames(data) == "DX_bl")
target <- paste0("class_", as.character(data[, target_indx]))
rm_indx <- which(colnames(data) %in% c("DX_bl", "ID", "TOTAL13", "MMSCORE"))
X <- data
X <- X[, -rm_indx]
for (i in 1:ncol(X)) X[, i] <- as.factor(dicretizeVector(X[, i], K = 3))
rf <- randomForest(X, as.factor(target))
treeList <- RF2List(rf) # transform rf object to an inTrees' format
exec <- extractRules(treeList, X) # R-executable conditions
## 4555 rules (length<=6) were extracted from the first 100 trees.
class <- paste0("class_", as.character(target))
rules <- getRuleMetric(exec, X, target)
print(rules[order(as.numeric(rules[, "len"])), ][1:5, ])
## len freq err condition
## [1,] "2" "0.118" "0.098" "X[,6] %in% c('L1') & X[,11] %in% c('L1')"
## [2,] "2" "0.182" "0" "X[,4] %in% c('L1') & X[,6] %in% c('L1')"
## [3,] "2" "0.182" "0" "X[,4] %in% c('L1') & X[,6] %in% c('L1')"
## [4,] "2" "0.081" "0.024" "X[,3] %in% c('L3') & X[,4] %in% c('L3')"
## [5,] "2" "0.043" "0.136" "X[,6] %in% c('L3') & X[,7] %in% c('L3')"
## pred
## [1,] "class_1"
## [2,] "class_1"
## [3,] "class_1"
## [4,] "class_0"
## [5,] "class_0"
rules.pruned <- pruneRule(rules, X, target, maxDecay = 0.005, typeDecay = 2)
length <- data.frame(original = as.numeric(rules[, "len"]), pruned = as.numeric(rules.pruned[,
"len"]))
ggplot(melt(length), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.4) + ggtitle("Histogram of Lengths") + theme(plot.title = element_text(hjust = 0.5))

frequency <- data.frame(original = as.numeric(rules[, "freq"]), pruned = as.numeric(rules.pruned[,
"freq"]))
ggplot(melt(frequency), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.05) + ggtitle("Histogram of Frequencies") + theme(plot.title = element_text(hjust = 0.5))

error <- data.frame(original = as.numeric(rules[, "err"]), pruned = as.numeric(rules.pruned[,
"err"]))
ggplot(melt(error), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.01) + ggtitle("Histogram of Errors") + theme(plot.title = element_text(hjust = 0.5))

rules.pruned <- pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 1)
length <- data.frame(original = as.numeric(rules[, "len"]), pruned = as.numeric(rules.pruned[,
"len"]))
ggplot(melt(length), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.4) + ggtitle("Histogram of Lengths") + theme(plot.title = element_text(hjust = 0.5))

frequency <- data.frame(original = as.numeric(rules[, "freq"]), pruned = as.numeric(rules.pruned[,
"freq"]))
ggplot(melt(frequency), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.05) + ggtitle("Histogram of Frequencies") + theme(plot.title = element_text(hjust = 0.5))

error <- data.frame(original = as.numeric(rules[, "err"]), pruned = as.numeric(rules.pruned[,
"err"]))
ggplot(melt(error), aes(value, fill = variable)) + geom_histogram(position = "dodge",
binwidth = 0.01) + ggtitle("Histogram of Errors") + theme(plot.title = element_text(hjust = 0.5))

rules.selected <- selectRuleRRF(rules.pruned, X, target)
rules.present <- presentRules(rules.selected, colnames(X))
print(cbind(ID = 1:nrow(rules.present), rules.present[, c("condition", "pred")]))
## ID
## [1,] "1"
## [2,] "2"
## [3,] "3"
## [4,] "4"
## [5,] "5"
## [6,] "6"
## [7,] "7"
## [8,] "8"
## [9,] "9"
## [10,] "10"
## [11,] "11"
## [12,] "12"
## [13,] "13"
## [14,] "14"
## [15,] "15"
## [16,] "16"
## condition
## [1,] "FDG %in% c('L1','L2') & HippoNV %in% c('L1')"
## [2,] "FDG %in% c('L1') & HippoNV %in% c('L1','L2')"
## [3,] "PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & rs3818361 %in% c('L2') & rs3851179 %in% c('L2')"
## [4,] "AGE %in% c('L3') & FDG %in% c('L1') & HippoNV %in% c('L1','L2')"
## [5,] "PTEDUCAT %in% c('L1') & AV45 %in% c('L2') & HippoNV %in% c('L2') & rs610932 %in% c('L2')"
## [6,] "HippoNV %in% c('L1') & rs3818361 %in% c('L1')"
## [7,] "AV45 %in% c('L3') & HippoNV %in% c('L1','L2') & e4_1 %in% c('L2')"
## [8,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2') & e4_1 %in% c('L2') & rs3764650 %in% c('L1')"
## [9,] "AGE %in% c('L1') & PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & HippoNV %in% c('L1')"
## [10,] "AGE %in% c('L3') & PTGENDER %in% c('L1') & PTEDUCAT %in% c('L2') & AV45 %in% c('L1','L2')"
## [11,] "AGE %in% c('L2') & PTEDUCAT %in% c('L2','L3') & AV45 %in% c('L1','L2') & HippoNV %in% c('L2','L3') & e4_1 %in% c('L2')"
## [12,] "AGE %in% c('L2') & PTEDUCAT %in% c('L3') & e4_1 %in% c('L1')"
## [13,] "PTEDUCAT %in% c('L1','L3') & e4_1 %in% c('L1') & rs11136000 %in% c('L1') & rs610932 %in% c('L1')"
## [14,] "AGE %in% c('L2') & HippoNV %in% c('L2','L3') & rs3865444 %in% c('L1')"
## [15,] "AGE %in% c('L1','L2') & AV45 %in% c('L1')"
## [16,] "PTEDUCAT %in% c('L1','L3') & FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')"
## pred
## [1,] "class_1"
## [2,] "class_1"
## [3,] "class_0"
## [4,] "class_1"
## [5,] "class_1"
## [6,] "class_1"
## [7,] "class_1"
## [8,] "class_0"
## [9,] "class_0"
## [10,] "class_0"
## [11,] "class_0"
## [12,] "class_0"
## [13,] "class_0"
## [14,] "class_0"
## [15,] "class_0"
## [16,] "class_0"
print(cbind(ID = 1:nrow(rules.present), rules.present[, c("len", "freq", "err")]))
## ID len freq err
## [1,] "1" "2" "0.279" "0.083"
## [2,] "2" "2" "0.279" "0.09"
## [3,] "3" "5" "0.029" "0.133"
## [4,] "4" "3" "0.122" "0.016"
## [5,] "5" "4" "0.031" "0.312"
## [6,] "6" "2" "0.207" "0.121"
## [7,] "7" "3" "0.172" "0.124"
## [8,] "8" "4" "0.06" "0.194"
## [9,] "9" "5" "0.006" "0"
## [10,] "10" "4" "0.044" "0.13"
## [11,] "11" "5" "0.019" "0.2"
## [12,] "12" "3" "0.043" "0.182"
## [13,] "13" "4" "0.037" "0.158"
## [14,] "14" "3" "0.114" "0.203"
## [15,] "15" "2" "0.234" "0.215"
## [16,] "16" "3" "0.282" "0.144"
freqPattern <- getFreqPattern(rules.pruned)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.01 1
## maxlen target ext
## 4 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 45
##
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[52 item(s), 4555 transaction(s)] done [0.00s].
## sorting and recoding items ... [50 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [93 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
top.pattern <- (freqPattern[which(as.numeric(freqPattern[, "len"]) >= 2), ][1:5,
])
print(presentRules(top.pattern, colnames(X)))
## len sup conf
## [1,] "2" "0.038" "1"
## [2,] "2" "0.026" "1"
## [3,] "2" "0.023" "0.991"
## [4,] "2" "0.022" "0.953"
## [5,] "2" "0.021" "0.99"
## condition pred
## [1,] "FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')" "class_0"
## [2,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2','L3')" "class_0"
## [3,] "HippoNV %in% c('L1') & rs3818361 %in% c('L1')" "class_1"
## [4,] "AV45 %in% c('L3') & HippoNV %in% c('L1')" "class_1"
## [5,] "rs610932 %in% c('L1') & HippoNV %in% c('L2','L3')" "class_0"
learner <- buildLearner(rules.selected, X, target)
learner.readable <- presentRules(learner, colnames(X))
print(cbind(ID = 1:nrow(learner.readable), learner.readable[, c("condition",
"pred")]))
## ID
## [1,] "1"
## [2,] "2"
## [3,] "3"
## [4,] "4"
## [5,] "5"
## [6,] "6"
## [7,] "7"
## [8,] "8"
## [9,] "9"
## [10,] "10"
## [11,] "11"
## [12,] "12"
## condition
## [1,] "AGE %in% c('L3') & FDG %in% c('L1') & HippoNV %in% c('L1','L2')"
## [2,] "FDG %in% c('L1','L2') & HippoNV %in% c('L1')"
## [3,] "AGE %in% c('L2') & PTEDUCAT %in% c('L3') & e4_1 %in% c('L1')"
## [4,] "PTEDUCAT %in% c('L1','L3') & e4_1 %in% c('L1') & rs11136000 %in% c('L1') & rs610932 %in% c('L1')"
## [5,] "AGE %in% c('L3') & PTGENDER %in% c('L1') & PTEDUCAT %in% c('L2') & AV45 %in% c('L1','L2')"
## [6,] "PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & rs3818361 %in% c('L2') & rs3851179 %in% c('L2')"
## [7,] "PTEDUCAT %in% c('L1','L3') & FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')"
## [8,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2') & e4_1 %in% c('L2') & rs3764650 %in% c('L1')"
## [9,] "FDG %in% c('L1') & HippoNV %in% c('L1','L2')"
## [10,] "AGE %in% c('L2') & HippoNV %in% c('L2','L3') & rs3865444 %in% c('L1')"
## [11,] "AGE %in% c('L1','L2') & AV45 %in% c('L1')"
## [12,] "Else"
## pred
## [1,] "class_1"
## [2,] "class_1"
## [3,] "class_0"
## [4,] "class_0"
## [5,] "class_0"
## [6,] "class_0"
## [7,] "class_0"
## [8,] "class_0"
## [9,] "class_1"
## [10,] "class_0"
## [11,] "class_0"
## [12,] "class_0"
print(cbind(ID = 1:nrow(learner.readable), learner.readable[, c("len", "freq",
"err")]))
## ID len freq err
## [1,] "1" "3" "0.121856866537718" "0.0158730158730159"
## [2,] "2" "2" "0.195357833655706" "0.118811881188119"
## [3,] "3" "3" "0.034816247582205" "0.0555555555555556"
## [4,] "4" "4" "0.02321083172147" "0.0833333333333334"
## [5,] "5" "4" "0.0367504835589942" "0.105263157894737"
## [6,] "6" "5" "0.0154738878143133" "0.125"
## [7,] "7" "3" "0.2321083172147" "0.158333333333333"
## [8,] "8" "4" "0.0212765957446809" "0.181818181818182"
## [9,] "9" "2" "0.0328820116054159" "0.0588235294117647"
## [10,] "10" "3" "0.0425531914893617" "0.181818181818182"
## [11,] "11" "2" "0.0851063829787234" "0.204545454545455"
## [12,] "12" "1" "0.158607350096712" "0.317073170731707"